Loading the required libraries
library(tidyverse)
library(htmltools)
library(htmlwidgets)
library(leaflet)
library(leaflet.extras)
library(revgeo)
library(rgdal)
if (!require(geojsonio)) {
install.packages("geojsonio")
library(geojsonio)
}
library(sp)
library(maps)
library(ggmap)
library(maptools)
Reading in the datasets
crime_df = readRDS(file = "datasets/nyc_felony_misdemeanor.rds")
Reverse geocoding to get the zip codes
#### Sampling the just year 2017 and selecting all felony rape/attempted rape reports
zip_df = crime_df %>% filter(pd_cd %in% c(153, 157, 159, 155)) %>% filter(year == 2017) %>%
mutate(longitude = as.numeric(longitude),
latitude = as.numeric(latitude))
### Sampling 500 because so as not to exceed the API query limit
zip_df_1 = zip_df %>% sample_n(500)
### Reverse geocoding each longitude and latitude
zip_names_1 = zip_df_1 %>%
mutate(zip = map2(.x = longitude, .y = latitude, ~ revgeo(longitude = .x , latitude= .y, output = "hash", item = "zip")))
### SAving the dataset so we don't call the API to everytime
saveRDS(zip_names_1, file = "datasets/nyc_rape_zip.rds")
### Using antijoin to find the other data set
zip_df_2 = anti_join(zip_df, zip_df_1, by = "cmplnt_num")
### Reverse geocoding to get their zip codes
zip_names_2 = zip_df_2 %>%
mutate(zip = map2(.x = longitude, .y = latitude, ~ revgeo(longitude = .x , latitude= .y, output = "hash", item = "zip")))
### SAving the dataset so we don't call the API to everytime
saveRDS(zip_names_2, file = "./datasets/nyc_rape_zip_2.rds")
zip_df_1 = readRDS(file = "./datasets/nyc_rape_zip.rds")
zip_df_2 = readRDS(file = "./datasets/nyc_rape_zip_2.rds")
all_zip_df = bind_rows(zip_df_1, zip_df_2)
all_zip_df = all_zip_df %>% mutate(zip = as.character(zip)) %>%
mutate(zip = str_replace(zip, "list\\(zip =", ""),
zip = str_replace(zip, "\\)", ""),
zipcode = gsub('"', '', zip),
zipcode = str_replace(zipcode, ":[0-9][0-9][0-9][0-9][0-9]", ""),
zipcode = str_replace(zipcode, "-[0-9][0-9][0-9][0-9]", ""),
zipcode = as.numeric(zipcode),
zipcode = as.factor(zipcode))
all_zip_df %>%
summarize_all(funs(sum(is.na(.))))
## addr_pct_cd boro_nm cmplnt_fr_dt cmplnt_fr_tm cmplnt_num cmplnt_to_dt
## 1 0 0 0 0 0 191
## cmplnt_to_tm crm_atpt_cptd_cd jurisdiction_code ky_cd lat_lon_type
## 1 190 0 0 0 0
## lat_lon_coordinates latitude law_cat_cd loc_of_occur_desc longitude
## 1 0 0 0 117 0
## ofns_desc parks_nm pd_cd pd_desc prem_typ_desc rpt_dt susp_age_group
## 1 0 1144 0 0 1 0 80
## susp_race susp_sex vic_age_group vic_race vic_sex x_coord_cd y_coord_cd
## 1 80 80 0 0 0 0 0
## year zip zipcode
## 1 0 0 0
group_zip = all_zip_df %>% group_by(zipcode) %>% summarize(number = n())
map_url = "http://data.beta.nyc//dataset/3bf5fb73-edb5-4b05-bb29-7c95f4a727fc/resource/6df127b1-6d04-4bb7-b983-07402a2c3f90/download/f4129d9aa6dd4281bc98d0f701629b76nyczipcodetabulationareas.geojson"
download.file(map_url, "./datasets/nyc_zip_map.geojson")
map_json <- geojson_read("./datasets/nyc_zip_map.geojson", what = "sp")
map_json@data =
map_json@data %>%
left_join(group_zip, by = c("postalCode" = "zipcode")) %>% mutate(number = as.numeric(number)) %>%
mutate(number = if_else(is.na(number), 0, number))
Now let’s plot our polygons
# create color palette with colorNumeric()
nyc_pal <- colorNumeric("Reds", domain = NULL)
map_json %>%
leaflet() %>%
#addTiles() %>%
#addProviderTiles("OpenStreetMap.BlackAndWhite")%>%
setView(lat = 40.7, lng = -74.0, zoom = 11) %>%
addPolygons(weight = 2, group = "zipcode",
color = nyc_pal(map_json@data$number),
fillOpacity = 0.5, opacity = 5,
smoothFactor = 0.2,
# add labels that display mumber
label = ~ paste("Zip Code: ", postalCode, "Number of reported rape: ", number),
# highlight polygons on hover
highlight = highlightOptions(weight = 2, color = "black",
bringToFront = TRUE)) %>%
addSearchFeatures(options = searchFeaturesOptions(zoom = 12), targetGroups = "zipcode")
m1 <- crime_df %>%
filter(law_cat_cd == "felony" & (pd_cd %in% c(109, 198, 386, 397))) %>%
leaflet() %>%
addTiles() %>%
#addProviderTiles("CartoDB") %>%
setView(lat = 40.7, lng = -74.0, zoom = 10) %>%
addCircleMarkers(lng = ~ as.numeric(longitude),
lat = ~ as.numeric(latitude), radius = 1, label = ~ pd_desc,
clusterOptions = markerClusterOptions())
m1